# Compute two sample proportion test

res <- prop.test(x = c(154, 240), n = c(403, 403), correct = FALSE)
res
asin(sqrt(0.59)) - 2* asin(sqrt(0.38))

p1 <- 0.382

p2 <- 0.596

# Compute Cohen's h
cohen_h <- 2 * (asin(sqrt(p1)) - asin(sqrt(p2)))

# Print result
print(cohen_h)

install.packages("ggplot2")
install.packages("binom")
library(ggplot2)
library(binom)

library(ggplot2)

# Create plots

# Define successes and trials for two groups
successes <- c(154, 240)    # number of successes
trials <- c(403, 403)     # number of trials
groups <- c("Control", "Inoculation")

# Calculate proportions and 95% confidence intervals
proportions <- successes / trials
stderr <- sqrt(proportions * (1 - proportions) / trials)
ci_low <- proportions - 1.96 * stderr
ci_high <- proportions + 1.96 * stderr

# Create a data frame
df <- data.frame(
  Group = groups,
  Proportion = proportions,
  CI_low = ci_low,
  CI_high = ci_high
)

print (ci_high)
custom_colors <- c("Control" = "steelblue", "Inoculation" = "tomato")

# Create the plot for T1
ggplot(df, aes(x = Group, y = Proportion)) +
  geom_bar(aes(fill = Group), stat = "identity", width = 0.6, alpha = 0.4, show.legend = FALSE) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2) +
  geom_text(aes(label = sprintf("%.3f", Proportion)), vjust = -1.9, size = 5) +
  scale_fill_manual(values = custom_colors) +
  ylim(0, 1.2) +
  # Significance annotation
  annotate("text", x = 1.5, y = 1.05, label = "p < 0.01", size = 5, fontface = "bold") +
  annotate("segment", x = 1, xend = 2, y = 1.00, yend = 1.00, size = 0.5) +
  annotate("segment", x = 1, xend = 1, y = 1.00, yend = 0.95, size = 0.5) +
  annotate("segment", x = 2, xend = 2, y = 1.00, yend = 0.95, size = 0.5) +
  ylab("Proportion correct") +
  ggtitle("") +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

# Follow-up study two sample proportion test

res <- prop.test(x = c(190, 162), n = c(432, 244), correct = FALSE)
res
asin(sqrt(0.66)) - 2* asin(sqrt(0.439))

p1 <- 0.439

p2 <- 0.663

# Compute Cohen's h
cohen_h <- 2 * (asin(sqrt(p1)) - asin(sqrt(p2)))

# Print result
print(cohen_h)


library(ggplot2)
library(binom)

library(ggplot2)

# Create plot for T2

# Define successes and trials for two groups
successes <- c(190, 162)    # number of successes
trials <- c(432, 244)     # number of trials
groups <- c("Control", "Inoculation")

# Calculate proportions and 95% confidence intervals
proportions <- successes / trials
stderr <- sqrt(proportions * (1 - proportions) / trials)
ci_low <- proportions - 1.96 * stderr
ci_high <- proportions + 1.96 * stderr

# Create a data frame
df <- data.frame(
  Group = groups,
  Proportion = proportions,
  CI_low = ci_low,
  CI_high = ci_high
)

print (ci_high)
custom_colors <- c("Control" = "steelblue", "Inoculation" = "tomato")

# Create the plot
ggplot(df, aes(x = Group, y = Proportion)) +
  geom_bar(aes(fill = Group), stat = "identity", width = 0.6, alpha = 0.4, show.legend = FALSE) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2) +
  geom_text(aes(label = sprintf("%.3f", Proportion)), vjust = -1.9, size = 5) +
  scale_fill_manual(values = custom_colors) +
  ylim(0, 1.2) +
  # Significance annotation
  annotate("text", x = 1.5, y = 1.05, label = "p < 0.01", size = 5, fontface = "bold") +
  annotate("segment", x = 1, xend = 2, y = 1.00, yend = 1.00, size = 0.5) +
  annotate("segment", x = 1, xend = 1, y = 1.00, yend = 0.95, size = 0.5) +
  annotate("segment", x = 2, xend = 2, y = 1.00, yend = 0.95, size = 0.5) +
  ylab("Proportion correct") +
  ggtitle("") +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

# Click behavior two sample proportion test

res <- prop.test(x = c(127, 147), n = c(119101, 47623), correct = FALSE)
res
asin(sqrt(0.001066)) - 2* asin(sqrt(0.0030867))

p1 <- 0.001066

p2 <- 0.0030867

# Compute Cohen's h
cohen_h <- 2 * (asin(sqrt(p1)) - asin(sqrt(p2)))

# Print result
print(cohen_h)

# Create plot for click behavior T1

# Data
successes <- c(127, 147)
trials <- c(119101, 47623)
groups <- c("Control", "Inoculation")

# Proportions and Confidence Intervals
proportions <- successes / trials
stderr <- sqrt(proportions * (1 - proportions) / trials)
ci_low <- proportions - 1.96 * stderr
ci_high <- proportions + 1.96 * stderr

df <- data.frame(
  Group = groups,
  Proportion = proportions,
  CI_low = ci_low,
  CI_high = ci_high
)

# Position labels slightly above CI
df$Label_y <- df$CI_high + (max(df$Proportion) * 0.1)

# Position for significance bar and p-value text
sig_y <- max(df$Label_y) * 1.15
text_y <- sig_y + 0.0005    # Closer gap above bar
y_max <- text_y * 1.05

# Plot
ggplot(df, aes(x = Group, y = Proportion)) +
  geom_col(aes(fill = Group), width = 0.6, alpha = 0.4, show.legend = FALSE) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2) +
  geom_text(aes(y = Label_y, label = sprintf("%.3f%%", Proportion * 100)), size = 4) +
  scale_fill_manual(values = c("Control" = "steelblue", "Inoculation" = "tomato")) +
  ylim(0, y_max) +
  annotate("segment", x = 1, xend = 2, y = sig_y, yend = sig_y, size = 0.5) +           # horizontal bar
  annotate("segment", x = 1, xend = 1, y = sig_y, yend = sig_y - 0.0003, size = 0.5) +  # left tick
  annotate("segment", x = 2, xend = 2, y = sig_y, yend = sig_y - 0.0003, size = 0.5) +  # right tick
  annotate("text", x = 1.5, y = text_y, label = "p < 0.001", size = 5, fontface = "bold") +  # p-value text
  ylab("Click-Through Rate (CTR)") +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

# Click behavior follow-up

res <- prop.test(x = c(166, 108), n = c(163555, 39354), correct = FALSE)
res
asin(sqrt(0.00101)) - 2* asin(sqrt(0.00274))

p1 <- 0.00101

p2 <- 0.00274

# Compute Cohen's h
cohen_h <- 2 * (asin(sqrt(p1)) - asin(sqrt(p2)))

# Print result
print(cohen_h)

# Create plot for click behavior T2

# Data
successes <- c(166, 108)
trials <- c(163555, 39354)
groups <- c("Control", "Inoculation")

# Proportions and Confidence Intervals
proportions <- successes / trials
stderr <- sqrt(proportions * (1 - proportions) / trials)
ci_low <- proportions - 1.96 * stderr
ci_high <- proportions + 1.96 * stderr

df <- data.frame(
  Group = groups,
  Proportion = proportions,
  CI_low = ci_low,
  CI_high = ci_high
)

# Position labels slightly above CI
df$Label_y <- df$CI_high + (max(df$Proportion) * 0.1)

# Position for significance bar and p-value text
sig_y <- max(df$Label_y) * 1.15
text_y <- sig_y + 0.0005    # Closer gap above bar
y_max <- text_y * 1.05

# Plot
ggplot(df, aes(x = Group, y = Proportion)) +
  geom_col(aes(fill = Group), width = 0.6, alpha = 0.4, show.legend = FALSE) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2) +
  geom_text(aes(y = Label_y, label = sprintf("%.3f%%", Proportion * 100)), size = 4) +
  scale_fill_manual(values = c("Control" = "steelblue", "Inoculation" = "tomato")) +
  ylim(0, y_max) +
  annotate("segment", x = 1, xend = 2, y = sig_y, yend = sig_y, size = 0.5) +           # horizontal bar
  annotate("segment", x = 1, xend = 1, y = sig_y, yend = sig_y - 0.0003, size = 0.5) +  # left tick
  annotate("segment", x = 2, xend = 2, y = sig_y, yend = sig_y - 0.0003, size = 0.5) +  # right tick
  annotate("text", x = 1.5, y = text_y, label = "p < 0.001", size = 5, fontface = "bold") +  # p-value text
  ylab("Click-Through Rate (CTR)") +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )